library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.0.4     ✓ dplyr   1.0.2
## ✓ tidyr   1.1.2     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
setwd("/home/rguerillot/Documents/Travail/Abdou_project/Staph_infection_project/github_analysis/VANANZ_phenotypes")

# import cell count operetta data
read_tsv_filename <- function(flnm) {
  read_tsv(flnm, skip = 9, trim_ws = T) %>% 
    mutate(filename = flnm)
}

Spot_count.df <-list.files(path = "Operetta/raw_data/210921 THP1 n2/", pattern = "*Spots", full.names = T, recursive = T) %>% 
  map_df(~read_tsv_filename(.)) 
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   .default = col_double(),
##   `Cell Type` = col_character(),
##   Strain = col_character(),
##   `Bounding Box` = col_character()
## )
## ℹ Use `spec()` for the full column specifications.
## ! Multiple files in zip: reading ''_rels/.rels''
## Warning: Missing column names filled in: 'X1' [1]
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   X1 = col_character()
## )
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   .default = col_double(),
##   `Cell Type` = col_character(),
##   Strain = col_character(),
##   `Bounding Box` = col_character()
## )
## ℹ Use `spec()` for the full column specifications.
## ! Multiple files in zip: reading ''[Content_Types].xml''
## Warning: Missing column names filled in: 'X1' [1]
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   X1 = col_character()
## )
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   .default = col_double(),
##   `Cell Type` = col_character(),
##   Strain = col_character(),
##   `Bounding Box` = col_character()
## )
## ℹ Use `spec()` for the full column specifications.
## ! Multiple files in zip: reading ''[Content_Types].xml''
## Warning: Missing column names filled in: 'X1' [1]
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   X1 = col_character()
## )
unique(Spot_count.df$filename)
## [1] "Operetta/raw_data/210921 THP1 n2//210921_THP1_1.5h_Objects_Population - Spots.tsv"
## [2] "Operetta/raw_data/210921 THP1 n2//210921_THP1_24h_Objects_Population - Spots.tsv" 
## [3] "Operetta/raw_data/210921 THP1 n2//210921_THP1_5h_Objects_Population - Spots.tsv"
# Tidy Operetta combined dataframe

#names(Spot_count.df) <- gsub(" ", "_", names(Spot_count.df))
#names(Spot_count.df) <- gsub("Cells_-_", "", names(Spot_count.df))
Spot_count_clean.df <- Spot_count.df %>%
  separate(col = filename, sep = "/", into = c("a","b","c", "d", "experiment_id")) %>%
  separate(col = experiment_id, sep = "_", into = c("plate_date", "cell_type", "timepoint", "e", "f"), remove = F) %>%
  select(-a, -b, -d, -e, -f, -X1, -Timepoint) %>%
  mutate(Row = as.character(Row)) %>%
  mutate(Strain = ifelse(Strain == "Non-infected", yes = "non-infected", no = as.character(Strain))) %>%
  mutate(`Cell Type` = ifelse(`Cell Type` == "THP1 Casp1-/-", yes = "THP1 casp1-/-", no = as.character(`Cell Type`))) 
  

raw_to_ABC.df <- data_frame(Row = c(1,2,3,4,5,6,7,8), row = c("A","B","C","D","E","F","G","H")) %>%
  mutate(Row = as.character(Row))
## Warning: `data_frame()` was deprecated in tibble 1.1.0.
## Please use `tibble()` instead.
Spot_count_clean.df <- left_join(Spot_count_clean.df, raw_to_ABC.df) %>%
  select(-Row) %>%
  select(Row = row, Column, everything()) %>%
  mutate(Column = str_pad(Column, 2, pad = "0")) %>%
  mutate(Well = paste0(Row, Column)) %>%
  select(Well, everything(), -Replicate) %>%
  mutate(sample_id = paste(experiment_id, Well, `Cell Type`, Strain, sep = "#"))
## Joining, by = "Row"
# create df of replicate
sample_replicate_df <- Spot_count_clean.df %>%
  select(experiment_id, Well, `Cell Type`, Strain) %>%
  distinct() %>%
  group_by(experiment_id, `Cell Type`, Strain) %>%
  mutate(replicate = row_number()) %>%
  ungroup()

# merge with clean data
Spot_count_clean.df <- merge(Spot_count_clean.df, sample_replicate_df, by = c("experiment_id", "Well", "Cell Type", "Strain")) %>%
  mutate(timepoint = factor(timepoint, levels = c("1.5h", "5h", "24h"))) %>%
  mutate(Strain = factor(Strain, levels = c("WT", "agrA", "non-infected"))) %>%
  mutate(`Cell Type` = factor(`Cell Type`, levels = c("THP1 (Cas9)", "THP1 casp1-/-", "THP1 casp-4/5 -/-"))) 
  
  



# count bacteria per field
bac_count_field <- Spot_count_clean.df  %>% group_by(timepoint,  Well, `Cell Type`, Strain, replicate, Field) %>%
  count(name = "number of bacteria/field")

ggplot(bac_count_field, aes(x = Strain, y = `number of bacteria/field`, fill = `Cell Type`)) +
  geom_boxplot(outlier.shape = NA)+
  geom_point(position=position_jitterdodge(jitter.width = .15, jitter.height = 0),alpha=0.3) +
#  geom_jitter(width = .2, alpha= .3) +
  facet_grid(~timepoint)

# count infected cells
bac_count_infected <- Spot_count_clean.df  %>% group_by(timepoint,  Well, `Cell Type`, Strain, replicate, Field) %>%
  distinct(`Spots - Object No in Cells`) %>%
  count(name = "number of infected cells/field")


# count bacteria per cell
bac_count_cell <- Spot_count_clean.df  %>% 
  group_by(timepoint,  Well, `Cell Type`, Strain, replicate, Field, `Spots - Object No in Cells`) %>%
  count(name = "number of bacteria/infected cell") %>%
  ungroup() %>%
  group_by(timepoint,  Well, `Cell Type`, Strain, replicate, Field, `number of bacteria/infected cell`) %>%
  count(name = "number of infected cells")
  

ggplot(bac_count_cell, aes(x = `number of bacteria/infected cell`, y = `number of infected cells`, fill = `Cell Type`,
                           group =  interaction(`Cell Type`, `number of bacteria/infected cell`))) +
  geom_boxplot(outlier.shape = NA, position = "dodge2")+
  geom_point(position=position_jitterdodge(jitter.width = .15, jitter.height = .15),alpha=0.3, , colour = "black") +
  facet_grid(Strain ~timepoint)+
  xlim(0,7)
## Warning: Removed 11 rows containing missing values (stat_boxplot).
## Warning: Removed 1 rows containing missing values (geom_segment).
## Warning: Removed 2 rows containing missing values (geom_segment).
## Warning: Removed 1 rows containing missing values (geom_segment).
## Warning: Removed 2 rows containing missing values (geom_segment).
## Warning: Removed 1 rows containing missing values (geom_segment).

## Warning: Removed 1 rows containing missing values (geom_segment).
## Warning: Removed 15 rows containing missing values (geom_point).

ggplot(bac_count_cell, aes(x = `number of bacteria/infected cell`, y = `number of infected cells`, fill = `Strain`,
                           group =  interaction(`Strain`, `number of bacteria/infected cell`))) +
  geom_boxplot(outlier.shape = NA, position = "dodge2")+
  geom_point(position=position_jitterdodge(jitter.width = .15, jitter.height = .15),alpha=0.3, , colour = "black") +
  facet_grid(`Cell Type` ~timepoint)+
  xlim(0,7)
## Warning: Removed 11 rows containing missing values (stat_boxplot).
## Warning: Removed 2 rows containing missing values (geom_segment).
## Warning: Removed 1 rows containing missing values (geom_segment).

## Warning: Removed 1 rows containing missing values (geom_segment).

## Warning: Removed 1 rows containing missing values (geom_segment).

## Warning: Removed 1 rows containing missing values (geom_segment).

## Warning: Removed 1 rows containing missing values (geom_segment).
## Warning: Removed 19 rows containing missing values (geom_point).

# Plot as specified by Abdou


p1 <- ggplot(bac_count_field, aes(x = timepoint, y = `number of bacteria/field`, fill = `Cell Type`)) +
  geom_boxplot(outlier.shape = NA)+
  geom_point(position=position_jitterdodge(jitter.width = .15, jitter.height = 0),alpha=0.3) +
  #  geom_jitter(width = .2, alpha= .3) +
  facet_grid(~Strain) +
  ylab("intracellular bacteria per field") +
  theme_bw()
p1

p2 <- ggplot(bac_count_field %>%
               filter(`Cell Type` != "THP1 casp-4/5 -/-")
             , aes(x = timepoint, y = `number of bacteria/field`, fill = `Cell Type`)) +
  geom_boxplot(outlier.shape = NA)+
  geom_point(position=position_jitterdodge(jitter.width = .15, jitter.height = 0),alpha=0.3) +
  #  geom_jitter(width = .2, alpha= .3) +
  facet_grid(~Strain) +
  ylab("intracellular bacteria per field") +
  theme_bw()
p2

p3 <- ggplot(bac_count_field %>%
               filter(`Cell Type` != "THP1 casp1-/-")
             , aes(x = timepoint, y = `number of bacteria/field`, fill = `Cell Type`)) +
  geom_boxplot(outlier.shape = NA)+
  geom_point(position=position_jitterdodge(jitter.width = .15, jitter.height = 0),alpha=0.3) +
  #  geom_jitter(width = .2, alpha= .3) +
  facet_grid(~Strain) +
  ylab("intracellular bacteria per field") +
  theme_bw()
p3

p4 <- ggplot(bac_count_cell, aes(x = `number of bacteria/infected cell`, y = `number of infected cells`, fill = `Cell Type`,
                           group =  interaction(`Cell Type`, `number of bacteria/infected cell`))) +
  geom_boxplot(outlier.shape = NA, position = "dodge2")+
  geom_point(position=position_jitterdodge(jitter.width = .15, jitter.height = .15),alpha=0.3, , colour = "black") +
  facet_grid(Strain~timepoint) +
  theme_bw() +
  scale_x_discrete(limits=c(1,2,3,4,5,6,7,8,9,10))
## Warning: Continuous limits supplied to discrete scale.
## Did you mean `limits = factor(...)` or `scale_*_continuous()`?
p4

p5 <- ggplot(bac_count_cell %>%
               filter(`Cell Type` != "THP1 casp-4/5 -/-")
             , aes(x = `number of bacteria/infected cell`, y = `number of infected cells`, fill = `Cell Type`,
                                 group =  interaction(`Cell Type`, `number of bacteria/infected cell`))) +
  geom_boxplot(outlier.shape = NA, position = "dodge2")+
  geom_point(position=position_jitterdodge(jitter.width = .15, jitter.height = .15),alpha=0.3, , colour = "black") +
  facet_grid(Strain~timepoint) +
  theme_bw() +
  scale_x_discrete(limits=c(1,2,3,4,5,6,7,8,9,10))
## Warning: Continuous limits supplied to discrete scale.
## Did you mean `limits = factor(...)` or `scale_*_continuous()`?
p5

p6 <- ggplot(bac_count_cell %>%
               filter(`Cell Type` != "THP1 casp1-/-")
             , aes(x = `number of bacteria/infected cell`, y = `number of infected cells`, fill = `Cell Type`,
                   group =  interaction(`Cell Type`, `number of bacteria/infected cell`))) +
  geom_boxplot(outlier.shape = NA, position = "dodge2")+
  geom_point(position=position_jitterdodge(jitter.width = .15, jitter.height = .15),alpha=0.3, , colour = "black") +
  facet_grid(Strain~timepoint) +
  theme_bw() +
  scale_x_discrete(limits=c(1,2,3,4,5,6,7,8,9,10))
## Warning: Continuous limits supplied to discrete scale.
## Did you mean `limits = factor(...)` or `scale_*_continuous()`?
p6

# idem without points


p7 <- ggplot(bac_count_cell, aes(x = `number of bacteria/infected cell`, y = `number of infected cells`, fill = `Cell Type`,
                                 group =  interaction(`Cell Type`, `number of bacteria/infected cell`))) +
  geom_boxplot(outlier.shape = NA, position = "dodge2")+
  #geom_point(position=position_jitterdodge(jitter.width = .15, jitter.height = .15),alpha=0.3, , colour = "black") +
  facet_grid(Strain~timepoint) +
  theme_bw() +
  scale_x_discrete(limits=c(1,2,3,4,5,6,7,8,9,10))
## Warning: Continuous limits supplied to discrete scale.
## Did you mean `limits = factor(...)` or `scale_*_continuous()`?
p7

p8 <- ggplot(bac_count_cell %>%
               filter(`Cell Type` != "THP1 casp-4/5 -/-")
             , aes(x = `number of bacteria/infected cell`, y = `number of infected cells`, fill = `Cell Type`,
                   group =  interaction(`Cell Type`, `number of bacteria/infected cell`))) +
  geom_boxplot(outlier.shape = NA, position = "dodge2")+
  #geom_point(position=position_jitterdodge(jitter.width = .15, jitter.height = .15),alpha=0.3, , colour = "black") +
  facet_grid(Strain~timepoint) +
  theme_bw() +
  scale_x_discrete(limits=c(1,2,3,4,5,6,7,8,9,10))
## Warning: Continuous limits supplied to discrete scale.
## Did you mean `limits = factor(...)` or `scale_*_continuous()`?
p8

p9 <- ggplot(bac_count_cell %>%
               filter(`Cell Type` != "THP1 casp1-/-")
             , aes(x = `number of bacteria/infected cell`, y = `number of infected cells`, fill = `Cell Type`,
                   group =  interaction(`Cell Type`, `number of bacteria/infected cell`))) +
  geom_boxplot(outlier.shape = NA, position = "dodge2")+
  #geom_point(position=position_jitterdodge(jitter.width = .15, jitter.height = .15),alpha=0.3, , colour = "black") +
  facet_grid(Strain~timepoint) +
  theme_bw() +
  scale_x_discrete(limits=c(1,2,3,4,5,6,7,8,9,10))
## Warning: Continuous limits supplied to discrete scale.
## Did you mean `limits = factor(...)` or `scale_*_continuous()`?
p9

# Plot total number of infected cells

p10 <- ggplot(bac_count_infected, aes(x = timepoint, y = `number of infected cells/field`, fill = `Cell Type`)) +
  geom_boxplot(outlier.shape = NA)+
  geom_point(position=position_jitterdodge(jitter.width = .15, jitter.height = 0),alpha=0.3) +
  #  geom_jitter(width = .2, alpha= .3) +
  facet_grid(~Strain) +
  ylab("infected cells per field") +
  theme_bw()
p10

p11 <- ggplot(bac_count_infected %>% 
                filter(`Cell Type` != "THP1 casp-4/5 -/-"), aes(x = timepoint, y = `number of infected cells/field`, fill = `Cell Type`)) +
  geom_boxplot(outlier.shape = NA)+
  geom_point(position=position_jitterdodge(jitter.width = .15, jitter.height = 0),alpha=0.3) +
  #  geom_jitter(width = .2, alpha= .3) +
  facet_grid(~Strain) +
  ylab("infected cells per field") +
  theme_bw()
p11

p12 <- ggplot(bac_count_infected %>% 
                filter(`Cell Type` != "THP1 casp1-/-"), aes(x = timepoint, y = `number of infected cells/field`, fill = `Cell Type`)) +
  geom_boxplot(outlier.shape = NA)+
  geom_point(position=position_jitterdodge(jitter.width = .15, jitter.height = 0),alpha=0.3) +
  #  geom_jitter(width = .2, alpha= .3) +
  facet_grid(~Strain) +
  ylab("infected cells per field") +
  theme_bw()
p12

# note: umber of infected increase for agrA between 5h and 24h -> less seeded cells on 5h plate?? => need to correct by total number of cells

# Plot total nb cells and pct f infected cells ----

Cell_count.df <-list.files(path = "Operetta/raw_data/210921 THP1 n2/", pattern = "*Cells", full.names = T, recursive = T) %>% 
  map_df(~read_tsv_filename(.)) 
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   `Cell Type` = col_character(),
##   Strain = col_character(),
##   Replicate = col_double(),
##   Row = col_double(),
##   Column = col_double(),
##   Plane = col_double(),
##   Timepoint = col_double(),
##   Field = col_double(),
##   `Object No` = col_double(),
##   X = col_double(),
##   Y = col_double(),
##   `Bounding Box` = col_character(),
##   `Cells - Total Spot Area` = col_double(),
##   `Cells - Relative Spot Intensity` = col_double(),
##   `Cells - Number of Spots` = col_double(),
##   `Cells - Number of Spots per Area of Cell` = col_double(),
##   `Cells - Spots per Cell Mean` = col_double()
## )
## 
## ! Multiple files in zip: reading ''[Content_Types].xml''
## Warning: Missing column names filled in: 'X1' [1]
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   X1 = col_character()
## )
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   `Cell Type` = col_character(),
##   Strain = col_character(),
##   Replicate = col_double(),
##   Row = col_double(),
##   Column = col_double(),
##   Plane = col_double(),
##   Field = col_double(),
##   `Object No` = col_double(),
##   X = col_double(),
##   Y = col_double(),
##   `Bounding Box` = col_character(),
##   `Cells - Total Spot Area` = col_double(),
##   `Cells - Relative Spot Intensity` = col_double(),
##   `Cells - Number of Spots` = col_double(),
##   `Cells - Number of Spots per Area of Cell` = col_double(),
##   `Cells - Spots per Cell Mean` = col_double()
## )
## ! Multiple files in zip: reading ''[Content_Types].xml''
## Warning: Missing column names filled in: 'X1' [1]
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   X1 = col_character()
## )
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   `Cell Type` = col_character(),
##   Strain = col_character(),
##   Replicate = col_double(),
##   Row = col_double(),
##   Column = col_double(),
##   Plane = col_double(),
##   Field = col_double(),
##   `Object No` = col_double(),
##   X = col_double(),
##   Y = col_double(),
##   `Bounding Box` = col_character(),
##   `Cells - Total Spot Area` = col_double(),
##   `Cells - Relative Spot Intensity` = col_double(),
##   `Cells - Number of Spots` = col_double(),
##   `Cells - Number of Spots per Area of Cell` = col_double(),
##   `Cells - Spots per Cell Mean` = col_double()
## )
## ! Multiple files in zip: reading ''[Content_Types].xml''
## Warning: Missing column names filled in: 'X1' [1]
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   X1 = col_character()
## )
unique(Spot_count.df$filename)
## [1] "Operetta/raw_data/210921 THP1 n2//210921_THP1_1.5h_Objects_Population - Spots.tsv"
## [2] "Operetta/raw_data/210921 THP1 n2//210921_THP1_24h_Objects_Population - Spots.tsv" 
## [3] "Operetta/raw_data/210921 THP1 n2//210921_THP1_5h_Objects_Population - Spots.tsv"
Cell_count_clean.df <- Cell_count.df %>%
  separate(col = filename, sep = "/", into = c("a","b","c", "d", "experiment_id")) %>%
  separate(col = experiment_id, sep = "_", into = c("plate_date", "cell_type", "timepoint", "e", "f"), remove = F) %>%
  select(-a, -b, -c, -d, -e, -f, -X1, -Timepoint) %>%
  mutate(Row = as.character(Row)) %>%
  mutate(Strain = ifelse(Strain == "Non-infected", yes = "non-infected", no = as.character(Strain))) %>%
  mutate(`Cell Type` = ifelse(`Cell Type` == "THP1 Casp1-/-", yes = "THP1 casp1-/-", no = as.character(`Cell Type`))) %>%
  mutate( Cells = ifelse(`Cells - Number of Spots` > 0, yes = "infected cells", "non-infected cells"))  %>%
  mutate(timepoint = factor(timepoint, levels = c("1.5h", "5h", "24h"))) %>%
  mutate(Strain = factor(Strain, levels = c("WT", "agrA", "non-infected"))) %>%
  mutate(`Cell Type` = factor(`Cell Type`, levels = c("THP1 (Cas9)", "THP1 casp1-/-", "THP1 casp-4/5 -/-"))) 



total_cells_per_field <- Cell_count_clean.df  %>% group_by(timepoint,  Row, Column, `Cell Type`, Strain, Replicate, Field) %>%
  count(name = "number of cells") %>%
  ungroup()

infected_cells_per_field <- Cell_count_clean.df %>%
  filter(Cells == "infected cells") %>%
  group_by(timepoint,  Row, Column, `Cell Type`, Strain, Replicate, Field) %>%
  count(name = "number of infected cells") %>%
  ungroup()

pct_infected_cells_per_field <- merge(total_cells_per_field, infected_cells_per_field,
                                      by = c("timepoint", "Row", "Column", "Cell Type", "Strain", "Replicate", "Field"),
                                      all.x = T) %>%
  mutate(`number of infected cells` = ifelse(is.na(`number of infected cells`),yes = 0, no = `number of infected cells`)) %>%
  mutate(`% of infected cells` = (`number of infected cells`/`number of cells`)*100)


p13 <- ggplot(pct_infected_cells_per_field, aes(x = timepoint, y = `number of cells`, fill = `Cell Type`)) +
  geom_boxplot(outlier.shape = NA)+
  geom_point(position=position_jitterdodge(jitter.width = .15, jitter.height = 0),alpha=0.3) +
  #  geom_jitter(width = .2, alpha= .3) +
  facet_grid(~Strain) +
  #  ylab("intracellular bacteria per field") +
  theme_bw()
p13

p14 <- ggplot(pct_infected_cells_per_field, aes(x = timepoint, y = `number of infected cells`, fill = `Cell Type`)) +
  geom_boxplot(outlier.shape = NA)+
  geom_point(position=position_jitterdodge(jitter.width = .15, jitter.height = 0),alpha=0.3) +
  #  geom_jitter(width = .2, alpha= .3) +
  facet_grid(~Strain) +
  #  ylab("intracellular bacteria per field") +
  theme_bw()
p14

p15 <- ggplot(pct_infected_cells_per_field, aes(x = timepoint, y = `% of infected cells`, fill = `Cell Type`)) +
  geom_boxplot(outlier.shape = NA)+
  geom_point(position=position_jitterdodge(jitter.width = .15, jitter.height = 0),alpha=0.3) +
  #  geom_jitter(width = .2, alpha= .3) +
  facet_grid(~Strain) +
#  ylab("intracellular bacteria per field") +
  theme_bw()
p15

p16 <- ggplot(pct_infected_cells_per_field %>%
                filter(`Cell Type` != "THP1 casp-4/5 -/-"), aes(x = timepoint, y = `number of cells`, fill = `Cell Type`)) +
  geom_boxplot(outlier.shape = NA)+
  geom_point(position=position_jitterdodge(jitter.width = .15, jitter.height = 0),alpha=0.3) +
  #  geom_jitter(width = .2, alpha= .3) +
  facet_grid(~Strain) +
  #  ylab("intracellular bacteria per field") +
  theme_bw()
p16

p17 <- ggplot(pct_infected_cells_per_field %>%
                filter(`Cell Type` != "THP1 casp-4/5 -/-"), aes(x = timepoint, y = `number of infected cells`, fill = `Cell Type`)) +
  geom_boxplot(outlier.shape = NA)+
  geom_point(position=position_jitterdodge(jitter.width = .15, jitter.height = 0),alpha=0.3) +
  #  geom_jitter(width = .2, alpha= .3) +
  facet_grid(~Strain) +
  #  ylab("intracellular bacteria per field") +
  theme_bw()
p17

p18 <- ggplot(pct_infected_cells_per_field %>%
                filter(`Cell Type` != "THP1 casp-4/5 -/-"), aes(x = timepoint, y = `% of infected cells`, fill = `Cell Type`)) +
  geom_boxplot(outlier.shape = NA)+
  geom_point(position=position_jitterdodge(jitter.width = .15, jitter.height = 0),alpha=0.3) +
  #  geom_jitter(width = .2, alpha= .3) +
  facet_grid(~Strain) +
  #  ylab("intracellular bacteria per field") +
  theme_bw()
p18

p19 <- ggplot(pct_infected_cells_per_field %>%
                filter(`Cell Type` != "THP1 casp1-/-"), aes(x = timepoint, y = `number of cells`, fill = `Cell Type`)) +
  geom_boxplot(outlier.shape = NA)+
  geom_point(position=position_jitterdodge(jitter.width = .15, jitter.height = 0),alpha=0.3) +
  #  geom_jitter(width = .2, alpha= .3) +
  facet_grid(~Strain) +
  #  ylab("intracellular bacteria per field") +
  theme_bw()
p19

p20 <- ggplot(pct_infected_cells_per_field %>%
                filter(`Cell Type` != "THP1 casp1-/-"), aes(x = timepoint, y = `number of infected cells`, fill = `Cell Type`)) +
  geom_boxplot(outlier.shape = NA)+
  geom_point(position=position_jitterdodge(jitter.width = .15, jitter.height = 0),alpha=0.3) +
  #  geom_jitter(width = .2, alpha= .3) +
  facet_grid(~Strain) +
  #  ylab("intracellular bacteria per field") +
  theme_bw()
p20

p21 <- ggplot(pct_infected_cells_per_field %>%
                filter(`Cell Type` != "THP1 casp1-/-"), aes(x = timepoint, y = `% of infected cells`, fill = `Cell Type`)) +
  geom_boxplot(outlier.shape = NA)+
  geom_point(position=position_jitterdodge(jitter.width = .15, jitter.height = 0),alpha=0.3) +
  #  geom_jitter(width = .2, alpha= .3) +
  facet_grid(~Strain) +
  #  ylab("intracellular bacteria per field") +
  theme_bw()
p21

# check signals differences infected vs non-infected and try to remove background bact. counts in non-infected ----
ninf.df <- Spot_count_clean.df %>%
  filter(Strain == "non-infected") %>%
  select(starts_with("Spots")) %>%
  gather() %>%
  mutate(Strain = "non-infected")

inf.df <- Spot_count_clean.df %>%
  filter(Strain != "non-infected") %>%
  select(starts_with("Spots")) %>%
  gather() %>%
  mutate(Strain = "infected")

ninf_inf.df <- rbind(ninf.df, inf.df) %>%
  filter(!is.na(value))

unique(ninf_inf.df$key)
##  [1] "Spots - Relative Spot  Intensity"        
##  [2] "Spots - Corrected Spot  Intensity"       
##  [3] "Spots - Uncorrected Spot  Peak Intensity"
##  [4] "Spots - Spot Contrast"                   
##  [5] "Spots - Spot Background Intensity"       
##  [6] "Spots - Spot Area [px²]"                 
##  [7] "Spots - Region Intensity"                
##  [8] "Spots - Spot To Region Intensity"        
##  [9] "Spots - Object No in Cells"              
## [10] "Spots - Spot Area [px²]"
ggplot(ninf_inf.df, aes(x = value, colour = Strain)) +
  geom_density() +
  facet_wrap(~ key, scales = "free") 

ggplot(ninf_inf.df, aes(x = value, colour = Strain)) +
  geom_density() +
  facet_wrap(~ key, scales = "free") +
  xlim(0, 100)
## Warning: Removed 18859 rows containing non-finite values (stat_density).

# based on distribution backround spot/bacteria can be removed by filtering Spot area > 37.5

Spot_count_clean_clean.df <-  Spot_count_clean.df %>%
  filter(`Spots - Spot Area [px²]` < 37.5)

# Count and re plot after filter
bac_count_field <- Spot_count_clean_clean.df  %>% group_by(timepoint,  Well, `Cell Type`, Strain, replicate, Field) %>%
  count(name = "number of bacteria/field")

ggplot(bac_count_field, aes(x = Strain, y = `number of bacteria/field`, fill = `Cell Type`)) +
  geom_boxplot(outlier.shape = NA)+
  geom_point(position=position_jitterdodge(jitter.width = .15, jitter.height = 0),alpha=0.3) +
  #  geom_jitter(width = .2, alpha= .3) +
  facet_grid(~timepoint)

# count bacteria per cell
bac_count_cell <- Spot_count_clean_clean.df  %>% 
  group_by(timepoint,  Well, `Cell Type`, Strain, replicate, Field, `Spots - Object No in Cells`) %>%
  count(name = "number of bacteria/infected cell") %>%
  ungroup() %>%
  group_by(timepoint,  Well, `Cell Type`, Strain, replicate, Field, `number of bacteria/infected cell`) %>%
  count()


ggplot(bac_count_cell, aes(x = `number of bacteria/infected cell`, y = n, fill = `Cell Type`,
                           group =  interaction(`Cell Type`, `number of bacteria/infected cell`))) +
  geom_boxplot(outlier.shape = NA, position = "dodge2")+
  geom_point(position=position_jitterdodge(jitter.width = .15, jitter.height = .15),alpha=0.3, , colour = "black") +
  facet_grid(Strain ~timepoint)+
  xlim(0,7)
## Warning: Removed 4 rows containing missing values (stat_boxplot).
## Warning: Removed 1 rows containing missing values (geom_segment).
## Warning: Removed 5 rows containing missing values (geom_point).

ggplot(bac_count_cell, aes(x = `number of bacteria/infected cell`, y = n, fill = `Strain`,
                           group =  interaction(`Strain`, `number of bacteria/infected cell`))) +
  geom_boxplot(outlier.shape = NA, position = "dodge2")+
  geom_point(position=position_jitterdodge(jitter.width = .15, jitter.height = .15),alpha=0.3, , colour = "black") +
  facet_grid(`Cell Type` ~timepoint)

xlim(0,7)
## <ScaleContinuousPosition>
##  Range:  
##  Limits:    0 --    7
# Note: doesn't work remove most of the data -> need to filter background during image processing seps